Objective: To visualise how the occupations have changed across the three time periods of 1850, 1880 and 1910.

The graphing process showed that there were some inconsistencies with the coding of `occstr`, such as "DRESSMAKER" and "DRESS MAKER" which need to be addressed.

1. Set Up

README

1850

library(dplyr)
library(ggplot2)
library(treemap)
library(extrafont)
library(stringr)
loadfonts()
library(knitr)
library(kableExtra)
library(tidyr)

# Loading
mn_1850 = readr::read_csv("../Data/census_1850_occ_mn.csv") %>% mutate(city = "Manhattan")
bk_1850 = readr::read_csv("../Data/census_1850_occ_bk.csv") %>% mutate(city = "Brooklyn")
OCC_1880 = readr::read_csv("../Data/OCC1880.csv")
OCC_1950 = readr::read_csv("../Data/OCC1950.csv")

# Combining Data
combined_1850 = rbind(mn_1850, bk_1850) %>% 
  select(age, sex, marst, race, labforce,
         occ, city, everything()) %>%
  left_join(OCC_1880, by = c("occ" = "code")) %>%
  mutate(race = factor(race,
                       levels = c(100,120,200,210,300),
                       labels = c("White", "Black (white)",
                                  "Black / African American",
                                  "Mulatto", "American Indian")),
         labforce = factor(labforce,
                           levels = c(0,1,2),
                           labels = c("N/A",
                                      "No, not in the labor force",
                                      "Yes, in the labor force")),
         sex = factor(sex,
                      levels = c(1,2),
                      labels = c("Male", "Female")),
         marst = factor(marst,
                        levels = c(1:6),
                        labels = c("Married, spouse present",
                                   "Married, spouse absent",
                                   "Separated",
                                   "Divorced",
                                   "Widowed",
                                   "Never married/single")))

# Theme setting for visualisations
theme_set(theme_minimal() + 
            theme(panel.grid.minor = element_blank(),
                  text = element_text(family = "Arial",
                                      colour = "black")))
col_sex = c("#e21737", "#0b648f")
col_miss = c("#86b817", "#e53238")

# Saving Memory
rm(OCC_1880)
rm(bk_1850)
rm(mn_1850)


1880

# Loading
mn_1880 = readr::read_csv("../Data/census_1880_occ_mn.csv") %>% mutate(city = "Manhattan")
bk_1880 = readr::read_csv("../Data/census_1880_occ_bk.csv") %>% mutate(city = "Brooklyn")
OCC_1880 = readr::read_csv("../Data/OCC1880.csv")

# Combining Data
combined_1880 = rbind(mn_1880, bk_1880) %>% 
  select(age, sex, race, labforce,
         occ, city, everything()) %>%
  left_join(OCC_1880, by = c("occ" = "code")) %>%
  mutate(age = ifelse(age == "Less than 1 year old",
                      0,
                      age),
         age = as.numeric(age))

rm(OCC_1880)
rm(bk_1880)
rm(mn_1880)


1910

# Loading
mn_1910 = readr::read_csv("../Data/census_1910_occ_mn.csv") %>% mutate(city = "Manhattan")
bk_1910 = readr::read_csv("../Data/census_1910_occ_bk.csv") %>% mutate(city = "Brooklyn")
OCC_1950 = readr::read_csv("../Data/OCC1950.csv")

# Combining Data
combined_1910 = rbind(mn_1910, bk_1910) %>%
  left_join(OCC_1950, by = c("occ1950" = "code")) %>%
  mutate(sex = factor(sex,
                      levels = c(1,2),
                      labels = c("Male", "Female")),
         labor_force = factor(labor_force,
                              levels = c(0,1,2),
                              labels = c("N/A", 
                                         "No, not in the labor force",
                                         "Yes, in the labor force")))

rm(OCC_1950)
rm(mn_1910)
rm(bk_1910)